

Class XML Parser

	inputs = String XML

	attributes
		String token_type {'atomic'|'open'|'close'}
		String token_name yString
		String token_parent yString
		String token_full_name yString
		String token_attributes[] xString
		String token_value yString
		String token_property_names[] ::= ('token_type', 'token_name', 'token_parent', 'token_full_name', 'token_attributes', 'token_value', 'token_properties', 'token_attributes')
		String token_attribute_names[]

	definitions
		tChar	::= (>,<,/)
		yString ::= any number of printable characters excluding tChar
		xString ::= yString, excluding spaces

	methods (interface)
		get_token		(String XML) :String
		get_attribute		(String hashArray[]) :String hashArray[]
		get_value		(String hashArray[]) :String hashArray[]
		get_name		(String hashArray[]) :String hashArray[]

	methods (definition)
		get_token		::= get_atomic_token ^ get_open_token ^ get_close_token
		get_name		::= FIND (^[ ]*'<'xString([ ]+|'>'|'/>'))
						SET this.token_name = xString
						REMOVE MATCH(ed) String from XML String
		get_attribute		::= WHILE token String still has more attributes
						// find key="value", update attributes hash and attribute names array
						FIND ^[ ]+xString[ ]*'='[ ]*'"'yString'"'
						SET this.token_attributes.xString = yString
						SET this.token_attribute_names[#LAST] = xString
						remove current attribute from token String
					    IF this.token_type = ('atomic'|'close') THEN END parse
		get_value		::= FIND (^yString)
						SET this.value = yString
						REMOVE MATCH(ed) String from XML String
		get_atomic_token	::= FIND (^[ ]*'<'yString'/>')
						SET this.token_type = 'atomic'
						get_name
						get_attribute
						REMOVE MATCH(ed) String from XML String
		get_open_token		::= FIND (^[ ]*'<'yString'>')
						SET this.token_type = 'open'
						get_name
						get_attribute
						get_value
						REMOVE MATCH(ed) String from XML String
		get_close_token		::= FIND (^[ ]*'</'yString'>')
						SET this.token_type = 'close'
						get_name
						REMOVE MATCH(ed) String from XML String


	logic
	(ARRAY APPROACH)	// Implementation using hash arrays.

		Stack parents
		// A Stack is an Array in which the first item in
		// is always the last item out.
		// The parents Stack is an Array that holds the list
		// of parent XML tokens.  When an Opening tag is
		// encountered, a token is added to the Stack.  After
		// the Closing tags have been found the element is
		// POP(ed) from the Stack.  The Stack is not affected
		// by Atomic tokens since they Open/Close immediately.

		String parents_list = 'root_array'
		// parents_list is a String records the current parent element
		// starting from root_array to root_array.elm1.elmx and back to root_array
		// the String grows as you descend into nested elements and it
		// shrinks as you ascend back towards the root, after each element
		// closing tag.


		Array array
		array['root_array'] = parse

		IF array['root_array']['token_type'] = 'atomic'
			RETURN array
			// root array is atomic, return now, no more work to do
		ELSE
			current_array = parse
			// get next token

			IF current_array['token_type'] = 'close'
				RETURN array
				// closing tags found for root array, no more work to do
			END IF

			DO	// Loop to scroll through all elements of XML document

				parents_list = parents_list + '.' + current_array['token_name']

				IF current_array['token_type'] = 'atomic'
					// current item is an Atomic tag, e.g. <myElement name='atomic element' />
					current_array['parent'] = parents[#LAST]
					// Set this token's parent to be the last entry in the Stack(Array) of parents
					current_array['full_name'] = parents_list + current_array['token_name']
					// Set this token's full name to the String parents_list plus its own name
					array[current_array['full_name']] = current_array
					// Add this array to Super Array of all tokens, Hashed by its fully qualified name, e.g. "a.b.c.d"

				ELSIF current_array['token_type'] = 'close'
					// current item is a closing tag, e.g. </myElement>
					parents_list = parents_list minus this token's name
					// Remove current token from parents_list String
					POP parents
					// Remove the name of this token from the end of the Stack(Array) of parents

				ELSE
					// current item is an opening tag, e.g. <myElement name='open element'>
					current_array['parent'] = parents[#LAST]
					// Set this token's parent to be the last entry in the Stack(Array) of parents
					current_array['full_name'] = parents_list + current_array['token_name']
					// Set this token's full name to the String parents_list plus its own name
					array[current_array['full_name']] = current_array
					// Add this array to Super Array of all tokens, Hashed by its fully qualified name, e.g. "a.b.c.d"
					PUSH parents current_array['token_name']
					// Add the name of this token to the end of the Stack(Array) of parents
				END IF
			WHILE current_array = parse
		END IF

		RETURN array



	notes
		1). After each method call, the Match(ed) text is returned and at the
		    same time, removed from the original XML string.
		2). Each call to parse the XML string RETURNs an Array which contains
		    either the entire Hash of tokens or the current token only.
		3). Arrays RETURN(ed) will be composite, i.e. nested Arrays will be
		    expressed as attributes of the parent Array
		5). Arrays will be hashed by name rather than number and nested Arrays
		    will be expressed as individual Array items with a parent attribute
		    that points at the parent Array item.  As such all tokens are
		    elements/items in the Super Array of all tokens.  However, every
		    token will have a property, PARENT.  If this property is 'root_array'
		    then this is the first element/token in the XML document.  If it is
		    not 'root_array' then it will indicate the full name of the element/token
		    that is its parent.
		6). By Hashing on fully qualified name rather than simple name, it becomes
		    possible to maintain several identical tokens and/or attributes that
		    occur at different nodes within the XML document in the same Hash Array.


